* Reset settings and initialize log file
launch, path("share/occ_flows")

*-------------------------------------------------------------------------------
* Price and Wasserman (2024), "The Summer Drop in Female Employment"
*
* Description: Trace seasonal shifts in EPOP to specific job types.
*-------------------------------------------------------------------------------


* Prepare data and run models
*-------------------------------------------------------------------------------

if "$estimate" != "0" {
	* Load data on adult individuals
	gzuse "$basepath/data/derived/cps_bms_sample.dta.gz", clear

	* Retain variables we need
	keep pid tm year month wtfinl wtraked tmspline* weeks female emp school ind1990 occ1990 ptemp hours uhrsworkt

	* De-populate industry and occupation for non-employed individuals
	replace school = . if emp == 0
	replace ind1990 = . if emp == 0
	replace occ1990 = . if emp == 0

	* Assign occupation-industry pairs to job categories
	classify_jobs

	* Store a list of job types
	quietly levelsof job, local(jobs)

	* Record lagged status
	gen l_emp = L.emp
	gen l_job = L.job

	tempfile core
	save `core'

	* Record job characteristics among men observed outside of summer
	gen byte sep_rate = 100 * (F.emp == 0) if emp == 1
	gen byte pt_rate = 100 * (ptemp == 1) if emp == 1
	keep if female == 0 & emp == 1 & !inrange(month, 6, 8)
	gcollapse (mean) sep_rate pt_rate hours uhrsworkt [pw = wtfinl], by(job)
	compress
	save "$basepath/models/occ_flows/job_chars.dta", replace
	use `core', clear

	* Restrict to linked observations
	keep if !missing(wtraked)

	* Measure inflows (f) and outflows (s), overall and by job type
	gen byte f_j0 = (l_emp == 0 & emp == 1)
	gen byte s_j0 = (l_emp == 1 & emp == 0)

	foreach j in `jobs' {
		gen byte f_j`j' = f_j0 * (job == `j')
		gen byte s_j`j' = s_j0 * (l_job == `j')
	}

	* Aggregate flows across individuals
	gcollapse (mean) f_j* s_j* (first) month tmspline* weeks (rawsum) wtraked [pw = wtraked], by(female tm)
	tsset female tm

	* Run specifications
	foreach f of numlist 0 1 {
		foreach yvar of varlist f_j* s_j* {
			quietly ivreg2 `yvar' ib5.month D.tmspline* D.weeks if female == `f' [aw = wtraked], bw($bandwidth) robust small
			process_estimates, path("occ_flows") model("f`f'_`yvar'")
		}
	}
}


* Create a dataset of estimates
*-------------------------------------------------------------------------------

if "$estimate" != "0" {
	* Get a list of .ster files in the given directory
	local flist : dir "$basepath/models/occ_flows/" files "*.ster"

	* Prepare coefficient labels for each month
	make_coeflabels

	* Turn into a dataset
	foreach f of numlist 0 1 {
		foreach x in "f" "s" {
			foreach j in 0 `jobs' {
				* Load estimates
				quietly estimates use "$basepath/models/occ_flows/f`f'_`x'_j`j'.ster"

				* Convert the estimates into a dataset
				matrix B = e(b)
				clear
				quietly svmat B, names(col)
				quietly gen byte g = `f'
				quietly gen byte j = `j'
				quietly gen yvar = "`x'"
				quietly reshape long coef, i(g j yvar) j(m)

				* Compute excess flows, relative to the average month
				quietly sum coef if inrange(m, 1, 12)
				quietly replace coef = coef - r(mean)
				rename coef value

				* Store for stacking
				tempfile f`f'_`x'_j`j'
				quietly save "`f`f'_`x'_j`j''"
			}
		}
	}

	* Stack all estimates
	clear
	foreach f of numlist 0 1 {
		foreach x in "f" "s" {
			foreach j in 0 `jobs' {
				append using "`f`f'_`x'_j`j''"
			}
		}
	}

	* Reshape
	reshape wide value, i(g j m) j(yvar) string
	rename value* *

	* Label variables
	label variable g "Gender (0 = men, 1 = women)"
	label variable j "Job type (0 = aggregate)"
	label variable m "Month (0 = May)"
	label variable f "Share finding employment (non-E => E)"
	label variable s "Share separating from employment (E => non-E)"

	* Save to disk
	order g j m f s
	save "$basepath/models/occ_flows/coefficients.dta", replace
}


* Compute flows
*-------------------------------------------------------------------------------

* Load dataset of estimates
use "$basepath/models/occ_flows/coefficients.dta", clear

* Label jobs
classify_jobs, labels_only
label values j job_lbl

* Calculate net excess flows from May to July
bysort g j (m): gen net_epop = sum(100 * (f - s) * (_n > 1))
keep if m == 2
keep g j net_epop


* Plot the May-July change in employment by occupation
*-------------------------------------------------------------------------------

* Show each occupation's contribution to net changes in EPOP
#delimit ;
graph hbar net_epop if g == 1 & inrange(j, 1, 23),
	over(j, label(labsize(*0.65)) sort(net_epop) gap(*1.5))
	ytitle("May{&minus}July change in employment (p.p.)", size(medsmall))
	ylabel(, labsize(medsmall));
#delimit cr

nicepdf "$basepath/output/occ_flows.pdf", indirect replace


* Compare contributions to the summer drop with measures of job flexibility
*-------------------------------------------------------------------------------

* Focus on the summer drop among women
keep if g == 1 & j != 0

* Merge in job characteristics
rename j job
merge 1:1 job using "$basepath/models/occ_flows/job_chars.dta", assert(3) nogenerate
format %9.3f net_epop
format %9.1f sep_rate pt_rate

* Focus on the non-education sector
keep if job <= 23

* Identify the occupations with the largest contributions
sort net_epop
list job net_epop in 1/10

* Tag the six largest contributors
gen byte contributor = inrange(_n, 1, 6)

* Compare these occupations to non-contributing occupations
gsort - pt_rate
list job sep_rate pt_rate hours uhrsworkt if contributor == 1, sep(0)
sum sep_rate pt_rate hours uhrsworkt if contributor == 0

* Close the log file
unlaunch
